#' ---
#' title: "Campaign Communication and Legislative Leadership (PSRM)"
#' subtitle: "00_reproduce_analysis.R"
#' author: "Authors: Stefan Mueller and Naofumi Fujimura"
#' ---

# Run this script in order to reproduce the entire data preparation
# and analysis

# You can run the following function which will check which packages
# are installed on your system, installs the missing packages packages,
# and loads all packages

install_required_packages <- function(install = FALSE) {
    required_packages <- c("quanteda", "dplyr", "stringr", "readr", 
                           "quanteda.textmodels", 
                           "quanteda.textstats", "rmarkdown",
                           "rio", "scales", "ggplot2", "xtable", 
                           "readstata13", "MASS", "broom", "texreg", 
                           "tidyr", "forcats", 
                           "Hmisc", "fixest", "marginaleffects", 
                           "mltest",
                           "clarify", "sandwich", 
                           "modelsummary")
    
    if (install) {
        new_packages <- required_packages[!required_packages %in% installed.packages()[,"Package"]]
        if (length(new_packages) > 0) {
            install.packages(new_packages)
            cat("Installed packages:", paste(new_packages, collapse = ", "), "\n")
        } else {
            cat("All required packages are already installed.\n")
        }
    } else {
        cat("Installation skipped. Loading libraries only.\n")
    }
    
    invisible(lapply(required_packages, function(pkg) {
        library(pkg, character.only = TRUE, quietly = TRUE)
    }))
}

# run code, install missing packages, and load all packages
install_required_packages(install = TRUE)

# The following packages are used in at least
# one of the scripts and must be installed 
# (package version behind each library() call)

# library(quanteda)            # CRAN v3.3.1
# library(dplyr)               # CRAN v1.1.2
# library(stringr)             # CRAN v1.5.0
# library(readr)               # CRAN v2.1.4
# library(quanteda.textmodels) # CRAN v0.9.6
# library(quanteda.textstats)  # CRAN v0.96.3
# library(rio)                 # CRAN v0.5.29
# library(scales)              # CRAN v1.2.1
# library(ggplot2)             # CRAN v3.4.2
# library(xtable)              # CRAN v1.8-4
# library(mltest)              # CRAN v1.0.1
# library(readstata13)         # CRAN v0.10.1
# library(MASS)                # CRAN v7.3-60
# library(broom)               # CRAN v1.0.5
# library(texreg)              # CRAN v1.38.6
# library(tidyr)               # CRAN v1.3.0
# library(forcats)             # CRAN v1.0.0
# library(Hmisc)               # CRAN v5.1-0
# library(fixest)              # CRAN v0.11.1
# library(marginaleffects)     # CRAN v0.16.0
# library(clarify)             # CRAN v0.2.0
# library(sandwich)            # CRAN v3.0-2
# library(modelsummary)        # CRAN v1.4.1


# If the code does not run, one or more packages may have been 
# updated, which may result in errors or conflicts. You can solve this issue
# by installing the package version listed above or by using the 
# groundhog package:
# after installing groundhog using install.packages("groundhog")
# change library(name_of_package) to
# groundhog::groundhog.library(name_of_package, date = "2024-01-31")
# Instead of adjusting the library() function for each package, 
# you can adjust them at all once using the
# the following syntax:
# groundhog.library("library('pkgA')
#                   library('pkgB')
#                   library('pkgC')", date = "2024-01-31")
# More details are available at: https://groundhogr.com/using/


## Load R script with function for custom ggplot2 theme
source("function_theme_base.R")

## Run R script
source("01_validate_statement_segmentation.R")
## Render R script as a markdown log report
rmarkdown::render("01_validate_statement_segmentation.R", 
                  output_format = "html_document")

## Run R script
source("02_prepare_bert_data.R")
## Render R script as a markdown log report
rmarkdown::render("02_prepare_bert_data.R", 
                  output_format = "html_document")


## 03a_finetune_transformer.ipynb
## Running this Python script will take many hours
## (over 3.5 hours on a Mac Studio M2 Max with 192 GB RAM)

## 03b_predict_all_sentences.ipynb
## Running this script takes around 15 minutes
## on a Mac Studio M2 Max with 192 GB RAM)

## Run R script
source("04_clean_transformers_output.R")

## Render R script as a markdown log report
rmarkdown::render("04_clean_transformers_output.R", 
                  output_format = "html_document")


## Run R script
source("05_performance_all_classifiers.R")

## Render R script as a markdown log report
rmarkdown::render("05_performance_all_classifiers.R",
                  output_format = "html_document")

## Run R script
source("06_merge_sources.R")

## Render R script as a markdown log report
rmarkdown::render("06_merge_sources.R", 
                  output_format = "html_document")


## Run R script
source("07_keyness_and_wordfish.R")

## Render R script as a markdown log report
rmarkdown::render("07_keyness_and_wordfish.R", 
                  output_format = "html_document")


## Run R script
source("08_analysis.R")

## Render R script as a markdown log report
rmarkdown::render("08_analysis.R", 
                  output_format = "html_document")
